notebook.community

Edit and run



In [1]:

    
import pandas as pd
from collections import OrderedDict
from bokeh.charts import Bar
from bokeh.plotting import output_notebook, show
from bokeh.palettes import brewer
from bokeh.models import LinearAxis, Range1d









    




    
        
        
        
    
        
        BokehJS successfully loaded.



In [24]:

    
output_notebook()



In [2]:

    
import targetsmosh as tm



In [20]:

    
reload(tm)









    Out[20]:





<module 'targetsmosh' from 'targetsmosh.py'>



In [ ]:

    
vcounts2013 = tm.get_vcounts(p, 2013)
tm.summary(vcounts2013, p)



In [4]:

    
p = pd.read_pickle('../CDPdata/sheet12_2014.pkl')



In [4]:

    
scopes12 = pd.read_pickle("../CDPdata/2010to2014scopes12.pkl")
scopes12_5 = pd.read_pickle("../CDPdata/2010to2014scopes12fiveyrs.pkl")
companies = scopes12.index.levels[0].tolist()
companies5 = scopes12_5.index.levels[0].tolist()



In [3]:

    
s12_c = pd.read_pickle("../CDPdata/s12_completed.pkl")
s12cos = s12_c.index.levels[0].tolist()



In [11]:

    
targets_yr={}
for yr in range(2010,2015):
    p = pd.read_pickle("../CDPdata/sheet"+str(tm.deets[yr]["summary"]["sheet"])+"_" + str(yr) + ".pkl")
    targets_yr[yr] = tm.get_targets(p, yr)
    targets_yr[yr]["year"] = yr

clean up 2010 manually



In [12]:

    
# oh i need to clean up 2010 for realz
counts2010 = targets_yr[2010]["Organisation"].value_counts()
multiples2010 = counts2010[counts2010>1].index.tolist()
# targets_yr[2010]["target type"].value_counts()



In [13]:

    
targets_yr[2010].drop_duplicates("Organisation",inplace=True)



In [14]:

    
len(multiples2010)
targets_yr[2010] = targets_yr[2010][targets_yr[2010]["Organisation"].isin(s12cos)]
targets_yr[2010].to_pickle("../CDPdata/2010targets_cleaned.pkl")

combine them all



In [15]:

    
alltargets = pd.concat(targets_yr.values())
targets = alltargets[alltargets["Organisation"].isin(s12cos)]
len(targets) # 7861









    Out[15]:





7861



In [16]:

    
targets.to_pickle("../CDPdata/targets_all.pkl")
len(targets.index.value_counts().index) # 2090









    Out[16]:





2090



In [27]:

    
targets.head()









    Out[27]:






  
    
      
      Organisation
      target type
      has absolute
      has intensity
    
    
      year
      
      
      
      
    
  
  
    
      2010
                          Actelios SpA
                                                     NaN
       False
       False
    
    
      2010
          Bharat Petroleum Corporation
                                                     NaN
       False
       False
    
    
      2010
       EDP - Energias de Portugal S.A.
                           Other: GHG Emission Reduction
       False
       False
    
    
      2010
                  Ernst & Young LLP UK
       Other: Proportion of total electricity which i...
       False
       False
    
    
      2010
           Fortune Fashions Industries
                                                     NaN
       False
       False

graph target setting



In [18]:

    
targets.set_index("year", inplace=True)



In [35]:

    
# targets.reset_index(inplace=True)
targets["abs count"] = 1
targets["int count"] = 1
target_yrgs = targets.groupby("year")
# target_yrgs.describe()









    



-c:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
-c:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [37]:

    
t_sums = target_yrgs.sum()
t_sums["abs percent"] = t_sums["has absolute"]/t_sums["abs count"]
t_sums["int percent"] = t_sums["has intensity"]/t_sums["int count"]



In [38]:

    
t_sums









    Out[38]:






  
    
      
      level_0
      index
      has absolute
      has intensity
      abs count
      int count
      abs percent
      int percent
    
    
      year
      
      
      
      
      
      
      
      
    
  
  
    
      2010
         901153
         901153
       411
       325
       1343
       1343
       0.306031
       0.241996
    
    
      2011
        3056738
        3056738
       608
       624
       1471
       1471
       0.413324
       0.424201
    
    
      2012
        5936685
        5936685
       678
       756
       1635
       1635
       0.414679
       0.462385
    
    
      2013
        8884680
        8884680
       749
       832
       1680
       1680
       0.445833
       0.495238
    
    
      2014
       12114474
       12114474
       786
       919
       1732
       1732
       0.453811
       0.530600



In [20]:

    
valuest_summary = OrderedDict()
valuest_summary["% With Absolute Targets"] = abs_percents
valuest_summary["% With Intensity Targets"] = int_percents



In [21]:

    
years = []
for yr in range(2010,2015):
    years.append(str(yr))



In [22]:

    
colors = brewer["Spectral"][3]
bar = Bar(valuest_summary, years, title="Targets Set for Companies That Reported 5 Years", filename="scope12_disclosure.html",
           stacked=False, ylabel="% of Companies",
           xlabel="Year", legend ="bottom_right")



In [25]:

    
show(bar)

make targets charts by country and sector



In [6]:

    
targets5 = pd.read_pickle("../CDPdata/2010to2014targets5yrs.pkl")
orginfos = pd.read_pickle("../CDPdata/orginfos.pkl").reset_index().set_index("Organisation")



In [7]:

    
# combine with orginfos in order to get sector and country numbers
targets5= targets5.reset_index().set_index(["Organisation"])



In [8]:

    
len(set(targets5.index)) # 654









    Out[8]:





654



In [9]:

    
targets5 = targets5.join(orginfos[["Country", "GICS Sector"]])
targets5["GICS Sector"].replace("Banks", "Financials", inplace=True)
targets5 = targets5.reset_index().set_index("Organisation").sort_index()



In [10]:

    
targets5["has absolute"] = targets5["has absolute"].apply(lambda(x): int(x))
targets5["has intensity"] = targets5["has intensity"].apply(lambda(x): int(x))



In [11]:

    
targets5_c = targets5.groupby(["year","Country"])
targets5_s = targets5.groupby(["year","GICS Sector"])
targets5_csums = targets5_c.sum()
targets5_ssums = targets5_s.sum()



In [12]:

    
scopes12_5["GICS Sector"].replace("Banks", "Financials", inplace=True)
scopes12_5 = scopes12_5.reset_index().set_index("Organisation").sort_index()



In [13]:

    
scopes12_5cd = scopes12_5.groupby(["year","Country"]).describe().reset_index()
scopes12_5sd = scopes12_5.groupby(["year","GICS Sector"]).describe().reset_index()



In [15]:

    
scopes12_5sdis = scopes12_5sd[scopes12_5sd["level_2"]=="count"][["year", "GICS Sector","scope1", "scope2"]]
scopes12_5sdis.set_index(["year", "GICS Sector"],inplace=True)
scopes12_5cdis = scopes12_5cd[scopes12_5cd["level_2"]=="count"][["year", "Country","scope1", "scope2"]]
scopes12_5cdis.set_index(["year", "Country"],inplace=True)



In [16]:

    
scopes12_5cdis.head()
targets5_csums.head()
# targets5_csums.loc[2010,"USA"]









    Out[16]:






  
    
      
      
      index
      has absolute
      has intensity
    
    
      year
      Country
      
      
      
    
  
  
    
      2010
      Argentina
        192
       0
       0
    
    
      Australia
       5804
       1
       0
    
    
      Austria
        213
       1
       1
    
    
      Belgium
       4631
       3
       1
    
    
      Bermuda
         90
       0
       1



In [17]:

    
def mergest(s, t):
    st = {}
    for yr in range(2010, 2015):
        st[yr] = s.loc[yr][["scope1","scope2"]]
        tyr = t.loc[yr]
        st[yr] = st[yr].join(tyr[["has absolute", "has intensity"]])
        st[yr].fillna(0,inplace=True)
        st[yr]["year"] = yr
        st[yr].sort("scope1", ascending = 0, inplace=True)
    sts = pd.concat(st.values())
    sts["abspercent"]  = sts["has absolute"]/sts["scope1"]*100.0
    sts["intpercent"]  = sts["has intensity"]/sts["scope1"]*100.0
    return sts



In [26]:

    
scopestargets_c = mergest(scopes12_5cdis, targets5_csums).reset_index().set_index(["year", "Country"]).sort_index()
scopestargets_s = mergest(scopes12_5sdis, targets5_ssums).reset_index().set_index(["year", "GICS Sector"]).sort_index()



In [56]:

    
sectors = scopestargets_s.index.levels[1].tolist()
stvalues_s = OrderedDict()
for i in scopestargets_s.index.levels[0]:
    stvalues_s[str(i)] = scopestargets_s.loc[i]["intpercent"].values



In [72]:

    
stvalues_c = OrderedDict()
for i in scopestargets_c.index.levels[0]:
    yrvalues = []
    for c in countries:
        yrvalues.append(scopestargets_c.loc[i,c]["intpercent"]) 
    stvalues_c[str(i)] = yrvalues



In [69]:

    
countries = scopestargets_c.loc[2010].sort("scope1", ascending =0).index.tolist()[0:9]
# scopestargets_s.loc["Consumer Discretionary"]["abspercent"].values



In [75]:

    
#bar = Bar(stvalues_s, sectors, title="Intensity Targets by Sector", filename="scope12_disclosure.html",
#           stacked=False, ylabel="% of Companies", xlabel="Sector", legend ="top_right")
bar = Bar(stvalues_c, countries, title="Intensity Targets by Country", filename="scope12_disclosure.html",
           stacked=False, ylabel="% of Companies", xlabel="Countries", legend ="top_right")
bar.y_range.end = 100



In [76]:

    
barci = bar
show(bar)

get target details



In [29]:

    
xls = pd.ExcelFile('../CDPdata/Investor CDP 2013_Public Data.xlsx')



In [36]:

    
xls2012 = pd.ExcelFile('../CDPdata/Investor CDP 2012_Public Data.xlsx')



In [52]:

    
p = xls2012.parse(14)
p2 = xls2012.parse(16)



In [56]:

    
p.to_pickle('../CDPdata/2012sheet14.pkl')
p2.to_pickle('../CDPdata/2012sheet16.pkl')



In [55]:

    
# 2014
p.columns[12:20]









    Out[55]:





Index([u'3.1d. Please provide details on your progress against this target made in the reporting year\n\n3.1d. ID', u'3.1d. % complete (time)', u'3.1d. % complete (emissions)', u'3.1d. Comment'], dtype='object')



In [24]:

    
p2[p2.columns[15]].value_counts()[0:10]



In [16]:

    
p2[p2.columns[18]].value_counts()









    Out[16]:





metric tonnes CO2e per unit revenue                             228
metric tonnes CO2e per FTE employee                             168
metric tonnes CO2e per metric tonne of product                  135
metric tonnes CO2e per unit of production                       112
metric tonnes CO2e per square meter                             108
metric tonnes CO2e per megawatt hour (MWh)                       42
grams CO2 per kilometer                                          23
metric tonnes CO2e per unit hour worked                          22
metric tonnes CO2e per square foot                               22
metric tonnes CO2e per unit of service provided                  17
metric tonnes CO2e per vehicle produced                          11
Other: metric tonnes of fugitive CO2e/tonnes CH4 produced by landfill     10
metric tonnes CO2e per kilometer                                  7
metric tonnes CO2e per barrel of oil equivalent (BOE)             7
Other:                                                            7
...
Other: U.S. tons CO2 emitted per MWh                            1
Other: metric tonnes CO2e per million dollars in sales          1
Other: tOther: tonnes of CO2e per tonnes of clinker             1
Other: kilograms of CO2e per thousand gross-ton-mile            1
Other: Crude oil equivalent KL /Net Sales                       1
Other: metric tonnes CO2e per unit GP                           1
Other: 実質生産高原単位指数改善率                                            1
Other: metric tonnes CO2e per unit shipped                      1
Other: mt CO2e/GC, converted from actual goal metric of restaurant kWh per number of sales transactions, or "guest counts" (GC)    1
Other: kg CO2/ton clinker                                       1
Other: grams CO2e per revenue ton mile                          1
Other: metric tons of CO2e avoided per tons of CO2e emitted     1
Other: CO2 per transferred mobile data (kg CO2/GB)              1
Other: tonnes CO2(e) relative to the value of net written premiums    1
Other: ＡＴＭ１台の排出量                                                1
Length: 427, dtype: int64



In [ ]:

	Organisation	target type	has absolute	has intensity
year
2010	Actelios SpA	NaN	False	False
2010	Bharat Petroleum Corporation	NaN	False	False
2010	EDP - Energias de Portugal S.A.	Other: GHG Emission Reduction	False	False
2010	Ernst & Young LLP UK	Other: Proportion of total electricity which i...	False	False
2010	Fortune Fashions Industries	NaN	False	False

	level_0	index	has absolute	has intensity	abs count	int count	abs percent	int percent
year
2010	901153	901153	411	325	1343	1343	0.306031	0.241996
2011	3056738	3056738	608	624	1471	1471	0.413324	0.424201
2012	5936685	5936685	678	756	1635	1635	0.414679	0.462385
2013	8884680	8884680	749	832	1680	1680	0.445833	0.495238
2014	12114474	12114474	786	919	1732	1732	0.453811	0.530600

		index	has absolute	has intensity
year	Country
2010	Argentina	192	0	0
	Australia	5804	1	0
	Austria	213	1	1
	Belgium	4631	3	1
	Bermuda	90	0	1